This report documents how we brought the CDC/ATSDR Social Vulnerability Index (SVI) into our project so we can describe and compare the social conditions of space alongside environmental outcomes. The SVI summarizes 15 census/ACS indicators into four themes—(1) Socioeconomic Status, (2) Household Composition & Disability, (3) Minority Status & Language, and (4) Housing Type & Transportation—and provides percentile ranks that indicate how vulnerable a tract or county is relative to all others. We use the SVI for 2000, 2010, and 2020, at both the census-tract and county levels, and subset to our seven states of interest (IL, IN, KY, OH, PA, TN, WV). Our goal is to align these social metrics with our other datasets, maintain comparability across time, and export tidy, analysis-ready tables and maps.
We use the CDC/ATSDR Social Vulnerability Index (SVI) releases for 2000, 2010, and 2020, downloaded at both the county and tract levels. All files, data dictionaries, and documentation come from the CDC/ATSDR SVI portal.
## -----------------------------
## 2020 COUNTIES
## -----------------------------
CDC_SVI_2020_county_dir <- here("Data", "CDC_Social_Vulnerability_Index",
"2020_County_Data")
CDC_SVI_2020_county_files <- fs::dir_ls(CDC_SVI_2020_county_dir,
regexp = "\\.csv$", recurse = TRUE)
CDC_SVI_2020_county <-
map_dfr(CDC_SVI_2020_county_files, read_csv, show_col_types = FALSE) %>%
clean_names() %>%
mutate(
year = 2020,
state_fips = substr(stcnty, 1, 2),
county_fips = substr(stcnty, 3, 5)
) %>%
# CDC: treat -999 as missing, but ONLY for numeric columns
mutate(across(where(is.numeric), ~na_if(.x, -999))) %>%
dplyr::select(-st, -stcnty, -state, -county) %>%
dplyr::select(st_abbr, state_fips, county_fips, everything())
## -----------------------------
## 2010 COUNTIES
## -----------------------------
CDC_SVI_2010_county_dir <- here("Data", "CDC_Social_Vulnerability_Index",
"2010_County_Data")
CDC_SVI_2010_county_files <- fs::dir_ls(CDC_SVI_2010_county_dir,
regexp = "\\.csv$", recurse = TRUE)
CDC_SVI_2010_county <-
map_dfr(CDC_SVI_2010_county_files, read_csv, show_col_types = FALSE) %>%
clean_names() %>%
mutate(
year = 2010,
state_fips = substr(fips, 1, 2),
county_fips = substr(fips, 3, 5)
) %>%
# CDC: treat -999 as missing, but ONLY for numeric columns
mutate(across(where(is.numeric), ~na_if(.x, -999))) %>%
dplyr::select(-state, -fips) %>%
dplyr::select(st_abbr=st, state_fips, county_fips, everything())
## -----------------------------
## 2000 COUNTIES
## -----------------------------
CDC_SVI_2000_county_dir <- here("Data", "CDC_Social_Vulnerability_Index",
"2000_County_Data")
CDC_SVI_2000_county_files <- fs::dir_ls(CDC_SVI_2000_county_dir,
regexp = "\\.csv$", recurse = TRUE)
CDC_SVI_2000_county <-
map_dfr(CDC_SVI_2000_county_files, read_csv, show_col_types = FALSE) %>%
clean_names() %>%
mutate(
year = 2000,
county_fips = as.character(cnty_fips),
state_fips = as.character(state_fips),
) %>%
# CDC: treat -999 as missing, but ONLY for numeric columns
mutate(across(where(is.numeric), ~na_if(.x, -999))) %>%
dplyr::select(-county, -stcofips, -cnty_fips) %>%
dplyr::select(st_abbr=state_abbr, state_fips, county_fips, everything())
#print(CDC_SVI_2020_county)
#print(CDC_SVI_2010_county)
#print(CDC_SVI_2000_county)library(dplyr)
library(janitor)
library(gt)
library(scales)
# Helper to convert a 0–1 percentile to CDC-style quartile labels
quartile_lab <- function(x) {
cut(
x,
breaks = c(-Inf, 0.25, 0.50, 0.75, Inf),
labels = c(
"0–0.25 (Least)",
"0.25–0.50",
"0.50–0.75",
"0.75–1.00 (Most)"
),
right = TRUE
)
}
county_2020_ranked <-
CDC_SVI_2020_county %>%
clean_names() %>%
filter(rpl_themes >= 0, rpl_themes <= 1) %>% # <-- drop -999 and any stray
mutate(
geoid = sprintf("%02s%03s", state_fips, county_fips),
svi_nat = rpl_themes
) %>%
group_by(st_abbr) %>%
mutate(svi_state = percent_rank(svi_nat)) %>% # re-rank within state
ungroup() %>%
mutate(
nat_quart = quartile_lab(svi_nat),
state_quart = quartile_lab(svi_state)
) %>%
dplyr::select(st_abbr, county_fips, location, svi_nat, svi_state, nat_quart, state_quart)
# Show the 25 highest national SVI counties
county_2020_ranked %>%
dplyr::slice_max(order_by = svi_nat, n = 25, with_ties = FALSE) %>%
gt::gt() %>%
gt::fmt_number(columns = c(svi_nat, svi_state), decimals = 3) %>%
gt::cols_label(
st_abbr = "State",
county_fips = "County FIPS",
location = "County",
svi_state = "Overall SVI (state-based)",
state_quart = "State quartile"
) %>%
gt::tab_header(
title = gt::md("**Between-county Differences in Overall SVI (2020)**"),
subtitle = gt::md("National vs. within-state percentiles & CDC-style quartiles")
) %>%
gt::tab_source_note(
gt::md("Notes: `RPL_THEMES` is the CDC national percentile rank (0–1). `svi_state` is re-ranked within state (0–1)..")
)| Between-county Differences in Overall SVI (2020) | ||||||
| National vs. within-state percentiles & CDC-style quartiles | ||||||
| State | County FIPS | County | svi_nat | Overall SVI (state-based) | nat_quart | State quartile |
|---|---|---|---|---|---|---|
| IL | 003 | Alexander County, Illinois | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IN | 097 | Marion County, Indiana | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| KY | 075 | Fulton County, Kentucky | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| OH | 007 | Ashtabula County, Ohio | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| PA | 101 | Philadelphia County, Pennsylvania | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| TN | 097 | Lauderdale County, Tennessee | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| WV | 055 | Mercer County, West Virginia | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| KY | 047 | Christian County, Kentucky | 0.992 | 0.992 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IL | 165 | Saline County, Illinois | 0.990 | 0.990 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| TN | 095 | Lake County, Tennessee | 0.989 | 0.989 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IN | 039 | Elkhart County, Indiana | 0.989 | 0.989 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| OH | 023 | Clark County, Ohio | 0.989 | 0.989 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| PA | 077 | Lehigh County, Pennsylvania | 0.985 | 0.985 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| KY | 095 | Harlan County, Kentucky | 0.983 | 0.983 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| WV | 047 | McDowell County, West Virginia | 0.982 | 0.981 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IL | 201 | Winnebago County, Illinois | 0.980 | 0.980 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| TN | 061 | Grundy County, Tennessee | 0.979 | 0.979 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IN | 041 | Fayette County, Indiana | 0.978 | 0.978 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| OH | 113 | Montgomery County, Ohio | 0.977 | 0.977 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| KY | 065 | Estill County, Kentucky | 0.975 | 0.975 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IL | 077 | Jackson County, Illinois | 0.970 | 0.970 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| PA | 049 | Erie County, Pennsylvania | 0.970 | 0.970 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| TN | 069 | Hardeman County, Tennessee | 0.968 | 0.968 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IN | 177 | Wayne County, Indiana | 0.967 | 0.967 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| KY | 051 | Clay County, Kentucky | 0.966 | 0.966 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
Notes: RPL_THEMES is the CDC national percentile rank (0–1). svi_state is re-ranked within state (0–1).. |
||||||
## -----------------------------
## 2020 TRACTS
## -----------------------------
CDC_SVI_2020_census_tract_dir <- here("Data","CDC_Social_Vulnerability_Index","2020_Census_Tract_Data")
CDC_SVI_2020_census_tract_files <- fs::dir_ls(CDC_SVI_2020_census_tract_dir,
regexp = "\\.csv$", recurse = TRUE)
CDC_SVI_2020_census_tract <-
map_dfr(CDC_SVI_2020_census_tract_files, read_csv, show_col_types = FALSE) %>%
clean_names() %>%
mutate(
year = 2020,
state_fips = substr(stcnty, 1, 2),
county_fips = substr(stcnty, 3, 5),
state_fips = sprintf("%02s", state_fips),
county_fips = sprintf("%03s", county_fips),
geoid = if ("fips" %in% names(.)) sprintf("%011.0f", fips) else paste0(state_fips, county_fips, tract),
st_abbr = st_abbr %||% state, # just in case
state = state %||% state_name # some files have both/one
) %>%
# CDC: treat -999 as missing, but ONLY for numeric columns
mutate(across(where(is.numeric), ~na_if(.x, -999))) %>%
dplyr::select(state, st_abbr, state_fips, county, county_fips, geoid, year, everything(), -st, -stcnty)
## -----------------------------
## 2010 TRACTS
## -----------------------------
CDC_SVI_2010_census_tract_dir <- here("Data","CDC_Social_Vulnerability_Index","2010_Census_Tract_Data")
CDC_SVI_2010_census_tract_files <- fs::dir_ls(CDC_SVI_2010_census_tract_dir,
regexp = "\\.csv$", recurse = TRUE)
CDC_SVI_2010_census_tract <-
map_dfr(CDC_SVI_2010_census_tract_files, read_csv, show_col_types = FALSE) %>%
clean_names() %>%
mutate(
year = 2010,
state_fips = sprintf("%02s", state_fips),
county_fips = sprintf("%03s", cnty_fips),
geoid = if ("fips" %in% names(.)) sprintf("%011.0f", fips) else paste0(state_fips, county_fips, tract),
st_abbr = state_abbr,
state = state_name
) %>%
# CDC: treat -999 as missing, but ONLY for numeric columns
mutate(across(where(is.numeric), ~na_if(.x, -999))) %>%
dplyr::select(state, st_abbr, state_fips, county, county_fips, geoid, year, everything(),
-stcofips, -cnty_fips)
## -----------------------------
## 2000 TRACTS
## -----------------------------
CDC_SVI_2000_census_tract_dir <- here("Data","CDC_Social_Vulnerability_Index","2000_Census_Tract_Data")
CDC_SVI_2000_census_tract_files <- fs::dir_ls(CDC_SVI_2000_census_tract_dir,
regexp = "\\.csv$", recurse = TRUE)
CDC_SVI_2000_census_tract <-
map_dfr(CDC_SVI_2000_census_tract_files, read_csv, show_col_types = FALSE) %>%
clean_names() %>%
mutate(
year = 2000,
state_fips = sprintf("%02d", state_fips),
county_fips = sprintf("%03s", cnty_fips),
geoid = paste0(state_fips, county_fips, tract),
st_abbr = state_abbr,
state = state_name,
county = county
) %>%
dplyr::select(state, st_abbr, state_fips, county, county_fips, geoid, year, everything(),
-stcofips, -cnty_fips)
#print(CDC_SVI_2020_census_tract)
#print(CDC_SVI_2010_census_tract)
#print(CDC_SVI_2000_census_tract)tract_2020_ranked <-
CDC_SVI_2020_census_tract %>%
clean_names() %>%
# keep only valid overall SVI percentiles
filter(!is.na(rpl_themes), dplyr::between(rpl_themes, 0, 1)) %>%
mutate(
st_abbr = coalesce(st_abbr, state),
svi_nat = rpl_themes # national percentile (given by CDC)
# geoid already exists from your read-in code, so we don't touch it
) %>%
group_by(st_abbr) %>%
mutate(svi_state = percent_rank(svi_nat)) %>%
ungroup() %>%
mutate(
nat_quart = quartile_lab(svi_nat),
state_quart = quartile_lab(svi_state)
) %>%
dplyr::select(st_abbr, county, location,
svi_nat, svi_state, nat_quart, state_quart)
# Show the 25 most vulnerable tracts nationally
tract_2020_ranked %>%
arrange(desc(svi_nat)) %>%
slice_head(n = 25) %>%
gt() %>%
fmt_number(columns = c(svi_nat, svi_state), decimals = 3) %>%
cols_label(
st_abbr = "State",
county = "County",
location = "Location",
svi_nat = "Overall SVI (national)",
svi_state = "Overall SVI (state-based)",
nat_quart = "Nat. quartile",
state_quart = "State quartile"
) %>%
tab_header(
title = md("**Between-tract Differences in Overall SVI (2020)**"),
subtitle = md("National vs. within-state percentiles & CDC-style quartiles")
) %>%
tab_source_note(
md("Notes: `RPL_THEMES` is the CDC national percentile rank (0–1). `svi_state` is re-ranked within state (0–1).")
)| Between-tract Differences in Overall SVI (2020) | ||||||
| National vs. within-state percentiles & CDC-style quartiles | ||||||
| State | County | Location | Overall SVI (national) | Overall SVI (state-based) | Nat. quartile | State quartile |
|---|---|---|---|---|---|---|
| IL | Lake | Census Tract 8623, Lake County, Illinois | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IN | Lake | Census Tract 310, Lake County, Indiana | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| KY | Jefferson | Census Tract 30, Jefferson County, Kentucky | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| OH | Franklin | Census Tract 51, Franklin County, Ohio | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| PA | Berks | Census Tract 25, Berks County, Pennsylvania | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| TN | Shelby | Census Tract 105, Shelby County, Tennessee | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| WV | Monongalia | Census Tract 101.03, Monongalia County, West Virginia | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IL | Cook | Census Tract 2603, Cook County, Illinois | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| OH | Franklin | Census Tract 26, Franklin County, Ohio | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| PA | York | Census Tract 12, York County, Pennsylvania | 1.000 | 1.000 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IL | Cook | Census Tract 8386, Cook County, Illinois | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IN | Elkhart | Census Tract 26, Elkhart County, Indiana | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| OH | Mahoning | Census Tract 8141, Mahoning County, Ohio | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| PA | Allegheny | Census Tract 1209, Allegheny County, Pennsylvania | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| TN | Shelby | Census Tract 8, Shelby County, Tennessee | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| KY | Boone | Census Tract 703.01, Boone County, Kentucky | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IL | Lake | Census Tract 8626.05, Lake County, Illinois | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| PA | Philadelphia | Census Tract 176.01, Philadelphia County, Pennsylvania | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| OH | Cuyahoga | Census Tract 1976, Cuyahoga County, Ohio | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IL | Cook | Census Tract 3016, Cook County, Illinois | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IN | Jackson | Census Tract 9679.01, Jackson County, Indiana | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| PA | Erie | Census Tract 13, Erie County, Pennsylvania | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| TN | Hamblen | Census Tract 1001, Hamblen County, Tennessee | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| OH | Hamilton | Census Tract 68, Hamilton County, Ohio | 0.999 | 0.999 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
| IL | Cook | Census Tract 8293.02, Cook County, Illinois | 0.999 | 0.998 | 0.75–1.00 (Most) | 0.75–1.00 (Most) |
Notes: RPL_THEMES is the CDC national percentile rank (0–1). svi_state is re-ranked within state (0–1). |
||||||
states_of_interest <- c("IL","IN","KY","OH","PA","TN","WV")
cdc_cols <- c("#c8e6c9", "#fff9c4", "#ffcc80", "#ef5350")
# ----------------------------
# 1) County polygons
# ----------------------------
counties_sf <- tigris::counties(cb = TRUE, year = 2020, class = "sf") %>%
janitor::clean_names() %>% # clean first
dplyr::filter(stusps %in% states_of_interest) %>%
dplyr::mutate(
state_fips = statefp, # the 2-digit FIPS
county_fips = countyfp, # the 3-digit FIPS
geoid = sprintf("%02s%03s", state_fips, county_fips)
) %>%
sf::st_transform(4326)
# keep just the columns we need, make a 5-digit county geoid
county_2020 <- CDC_SVI_2020_county %>%
dplyr::filter(year == 2020) %>%
dplyr::mutate(geoid = sprintf("%02s%03s", state_fips, county_fips)) %>%
dplyr::select(geoid, state=st_abbr, county=county_fips, year, rpl_themes)
county_2020_map <- counties_sf %>%
dplyr::left_join(county_2020, by = "geoid") %>%
dplyr::mutate(rpl_quint = quartile_lab(rpl_themes))
p_svi_2020 <- ggplot(county_2020_map) +
geom_sf(aes(fill = rpl_quint), color = NA) +
scale_fill_manual(values = cdc_cols, drop = FALSE, name = "Overall SVI\n(percentile)") +
labs(
title = "CDC Social Vulnerability Index (Overall, 2020)",
subtitle = "Classed into CDC-style quintile bins",
caption = "Source: CDC SVI 2020 • Projection: WGS84 / EPSG:4326"
) +
theme_minimal(base_size = 10) +
theme(
legend.position = "bottom",
panel.grid = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = NA, colour = NA),
plot.background = element_rect(fill = NA, colour = NA),
legend.background = element_rect(fill = NA, colour = NA)
)
print(p_svi_2020)ggsave(
here::here("Output", "Figures", "CDC_SVI_overall_2020_county_quintiles.png"),
p_svi_2020, width = 7.5, height = 6, dpi = 600
)
# Fetch 2020 tract geometries for our states
tracts_sf <- map_dfr(
states_of_interest,
~ tigris::tracts(cb = TRUE, year = 2020, state = .x, class = "sf")
) %>%
janitor::clean_names() %>%
mutate(
state_fips = statefp, # 2-digit FIPS
county_fips = countyfp, # 3-digit FIPS
tract_code = tractce, # 6-digit tract code
geoid = paste0(state_fips, county_fips, tract_code)
) %>%
sf::st_transform(4326)
# Prepare the 2020 SVI tract data
tract_2020 <- CDC_SVI_2020_census_tract %>%
filter(year == 2020) %>%
# treat invalid percentiles as NA
filter(!is.na(rpl_themes), between(rpl_themes, 0, 1)) %>%
mutate(
geoid = geoid, # already created on import
rpl_quart = quartile_lab(rpl_themes)
) %>%
dplyr::select(geoid, rpl_quart)
# Join and map
tract_2020_map <- tracts_sf %>%
left_join(tract_2020, by = "geoid")
ggplot(tract_2020_map %>% na.omit(rpl_quart)) +
geom_sf(aes(fill = rpl_quart), color = NA) +
scale_fill_manual(
values = cdc_cols,
na.value = "grey95",
drop = FALSE,
name = "Overall SVI\n(quartile)"
) +
labs(
title = "CDC Social Vulnerability Index (2020)",
subtitle = "Census tracts classed into CDC-style quartiles",
caption = "Source: CDC SVI 2020 • Projection: WGS84 / EPSG:4326"
) +
theme_minimal(base_size = 10) +
theme(
legend.position = "bottom",
panel.grid = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = NA, colour = NA)
)